Creating the figures for the ASLPrep paper

Here we create all the figures with data plotted. The data reflect an expanded sample included after the initial revision.

all_data <- read.csv('CBFData.csv')
names(all_data)
##  [1] "sub"      "Datasets" "CBF.GM"   "CBF.WM"   "CBFTYPE"  "QEI"     
##  [7] "NEG_CBF"  "FD"       "AGE"      "CBF_R"    "sex"
table(all_data$Datasets, all_data$CBFTYPE)
##      
##       BASIL  PVC SCBF SCRUB
##   AGE    63   63   63    63
##   FTD   110  110  110     0
##   IRR   214  214  214   214
##   NKI  1564 1564 1564  1564
##   PNC  1488 1488 1488  1488
all_data = na.omit(all_data)
all_data = subset(all_data, 
  (CBF.GM < 120) & 
  (CBF.GM > 5) & 
  (CBF.GM / CBF.WM > 1) & 
  (FD < 1))
all_data = all_data[!duplicated(all_data),]
table(all_data$Datasets, all_data$CBFTYPE)
##      
##       BASIL  PVC SCBF SCRUB
##   AGE    63   63   63    63
##   FTD   108  108  109     0
##   IRR   206  206  199   208
##   NKI  1549 1198 1547  1548
##   PNC  1486 1441 1481  1483
# Final demographics
scbf_only <- subset(all_data, CBFTYPE=="SCBF")
table(scbf_only$Datasets)
## 
##  AGE  FTD  IRR  NKI  PNC 
##   63  109  199 1547 1481
table(scbf_only$Datasets, scbf_only$sex, useNA="ifany")
##      
##         F   M
##   AGE  35  28
##   FTD  51  58
##   IRR 125  74
##   NKI 910 637
##   PNC 779 702
ddply(scbf_only, .(Datasets), summarise,
      mean_age=mean(AGE), sd_age=sd(AGE), 
      min_age=min(AGE), max_age=max(AGE) )
##   Datasets mean_age    sd_age   min_age  max_age
## 1      AGE 48.98413 24.408150 19.000000 84.00000
## 2      FTD 53.13761 15.514908 18.000000 79.00000
## 3      IRR 19.45729  3.937252 10.000000 30.00000
## 4      NKI 37.98643 22.363236  6.000000 85.00000
## 5      PNC 15.13262  3.604071  8.166667 23.08333
nrow(scbf_only)
## [1] 3399
rm(scbf_only)

Figure 2

Fig. 2 | ASLPrep quantifies CBF across sequences, scanners and the lifespan. a, CBF in GM and WM for each dataset. Boxes in each violin plot indicate interquartile range with the median shown as a white dot. b, GM CBF across the lifespan. The thick black line represents the predicted values from a generalized additive model; the dashed lines indicate the 95% confidence interval (R2 = 0.50; P = 1.1 × 10−16)

datagm = all_data
datagm$CBF.WM =NULL
datagm$TP ='GM'
datagm$CBF=datagm$CBF.GM 
datagm$CBF.GM = NULL 

datawm = all_data
datawm$CBF.GM =NULL
datawm$TP ='WM'
datawm$CBF=datawm$CBF.WM  
datawm$CBF.WM = NULL 

datasets = rbind(datagm,datawm)

datasetsy = datasets[datasets$CBFTYPE =='SCBF',]
datasetsy = na.omit(datasetsy)
dodge <- position_dodge(width = 0.5)
dp <- ggplot(datasetsy, aes(x=Datasets,y=CBF,fill=TP),plot = FALSE) + 
  geom_violin(position = dodge,width = 1.4)+
  geom_boxplot(width=.1, outlier.colour=NA, position = dodge) 
dp=dp + theme_classic() + scale_x_discrete(limits = rev) + 
  labs(x = "Datasets", y = "CBF(mL/100g/min)") +
  theme(axis.title.x = element_text(size = rel(1.2))) +
  theme(axis.title.y = element_text(size = rel(1.2) ,vjust=-0.6)) + ylim(0,140) +
  theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = 0.5)) +
  scale_fill_manual(values=c("#d95f0e","#756bb1")) + theme(legend.title = element_blank())

datay = all_data[all_data$CBFTYPE =='SCBF',]
pnc = datay[datay$Datasets=='PNC',]
nki = datay[datay$Datasets=='NKI',]
age = datay[datay$Datasets=='AGE',]
irr = datay[datay$Datasets=='IRR',]
ftd = datay[datay$Datasets=='FTD',]

#############################################################
cbf_Age_gam <- gam(CBF.GM ~ s(AGE, k=4) + sex + FD, method="REML", data = datay)

#####################
## Look at results ##
#####################
summary(cbf_Age_gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## CBF.GM ~ s(AGE, k = 4) + sex + FD
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  65.8445     0.4114 160.049  < 2e-16 ***
## sexM         -6.1628     0.4492 -13.719  < 2e-16 ***
## FD           -5.9500     1.5465  -3.848 0.000122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##          edf Ref.df    F p-value    
## s(AGE) 2.994      3 1325  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.541   Deviance explained = 54.2%
## -REML =  13493  Scale est. = 163.92    n = 3399
## Nonlinear age effect
Age_pval <- summary(cbf_Age_gam)$s.table[1,4]
Age_pval
## [1] 0
####################################
## Visualize Nonlinear Age Effect ##
####################################
plotdata <- visreg(cbf_Age_gam,'AGE',type = "conditional",scale = "linear", plot = FALSE)
smooths <- data.frame(Variable = plotdata$meta$x, 
                      x=plotdata$fit[[plotdata$meta$x]], 
                      smooth=plotdata$fit$visregFit, 
                      lower=plotdata$fit$visregLwr, 
                      upper=plotdata$fit$visregUpr)
predicts <- data.frame(Variable = "dim1", 
                       x=plotdata$res$AGE,
                       y=plotdata$res$visregRes)

CBF_Age_plot <- ggplot() + xlim(8, 85)+ ylim(0,140) +
  #theme(legend.position = "none")  +
  labs(x = "Age (years)", y = "CBF(mL/100g/min)",) +
  theme(axis.title.x = element_text(size = rel(1.2))) + 
  theme(axis.title.y = element_blank(),axis.ticks.y = element_blank(),axis.text.y = element_blank()) +
  theme(axis.text = element_text(size = rel(1.2))) + theme(axis.line = element_line(colour = 'black', size = 0.5), axis.ticks.length = unit(.25, "cm")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
  geom_point(data = predicts, aes(x, y), colour = "darksalmon", alpha=0.7 ) +  
  #geom_point(data=datay,aes(x=AGE, y=CBF.GM),size=1)+ 
  geom_point(data=pnc,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill='#e34a33', color="white")+ 
  geom_point(data=nki,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#3182bd", color="white")+ 
  geom_point(data=irr,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#31a354", color="white")+ 
  geom_point(data=ftd,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#d95f02", color="white")+ 
  geom_point(data=age,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#c51b8a", color="white")+ 
  geom_line(data = smooths, aes(x = x, y = smooth), colour = "midnightblue",size=1) +
  geom_line(data = smooths, aes(x = x, y=lower), linetype="dashed", colour = "midnightblue", alpha = 0.9, size = 0.9) + 
  geom_line(data = smooths, aes(x = x, y=upper), linetype="dashed",colour = "midnightblue", alpha = 0.9, size = 0.9) +
  coord_cartesian(xlim = c(10.5,85), ylim = c(0,140))
  
dir.create("Figure2")
## Warning in dir.create("Figure2"): 'Figure2' already exists
figure <- ggarrange(dp,CBF_Age_plot,
                    ncol = 2, nrow = 1,widths=c(1.5,1) )
## Warning: position_dodge requires non-overlapping x intervals
## Warning: Removed 8 rows containing missing values (geom_point).
## Removed 8 rows containing missing values (geom_point).
## Warning: Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
ggsave("Figure2/Figure2.pdf", width=7, units="in", height=3,dpi=800)
write.csv(all_data, "Figure2/Figure2Data.csv")
figure

Extended Data Figure 5

Extended Data Fig. 5 | Bayesian methods mitigate impact of in-scanner motion on CBF image quality. The impact of motion on quality differed significantly among quantification approaches (linear mixed effects model, F = 529.13, p = 1.0 × 10−25). The envelope indicates the 95% confidence interval.

datay <- subset(all_data, Datasets != 'FTD')
scbf = datay[datay$CBFTYPE=='SCBF',]
scrub = datay[datay$CBFTYPE=='SCRUB',]
basil = datay[datay$CBFTYPE=='BASIL',]
pvc = datay[datay$CBFTYPE=='PVC',]

cols = c("LINE1"="#e34a33","LINE2"="#3182bd","LINE3"="#d95f02",'LINE4'="#c51b8a")

CBF_Age_plot <- ggplot() + xlim(0, 1.1)+ ylim(0,1.1) +
  #theme(legend.position = "none")  +
  
  theme(axis.title.x = element_text(size = rel(1.2))) +
  theme(axis.title.y = element_text(size = rel(1.2),vjust=-0.9)) + 
  theme(axis.text = element_text(size = rel(1.2))) + theme(axis.line = element_line(colour = 'black', size = 0.5), axis.ticks.length = unit(.25, "cm")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
  #geom_point(data = predicts, aes(x, y), colour = "darksalmon", alpha=0.7 ) +  
  geom_point(data=scbf,aes(x=FD, y=QEI),size=1,color="red",alpha = 2/10)+ 
  geom_point(data=scrub,aes(x=FD, y=QEI),size=1,color="blue",alpha = 2/10)+ 
  geom_point(data=basil,aes(x=FD, y=QEI),size=1,color='darkgreen',alpha = 2/10)+ 
  #geom_point(data=pvc,aes(x=FD, y=QEI),size=1,color="#c51b8a")+ 

  geom_smooth(method='lm',data=scbf, aes(x=FD, y=QEI), color="red")+
  geom_smooth(method='lm',data=scrub,aes(x=FD, y=QEI), color="blue")+
  geom_smooth(method='lm',data=basil,aes(x=FD, y=QEI), color='darkgreen')+
  #geom_smooth(method='lm',data=pvc,aes(x=FD, y=QEI), color="#c51b8a") +
  labs(x = "FD (mm)", y = "QEI",color=cols)+ 
  scale_colour_manual(values=cols) + 
  scale_linetype_manual(values=cols) +
  scale_x_continuous(expand = c(0, 0)) + scale_y_continuous(expand = c(0, 0))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.
CBF_Age_plot
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

dir.create("ExtendedDataFigure5")
## Warning in dir.create("ExtendedDataFigure5"): 'ExtendedDataFigure5' already
## exists
ggsave("ExtendedDataFigure5/ExtendedDataFigure5.pdf", width=5, units="in", height=4,dpi=800)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
write.csv(datay, "ExtendedDataFigure5/ExtendedDataFigure5.csv")

# # do the statistics
pnc.model <- lmerTest::lmer(QEI ~ FD*CBFTYPE + AGE + sex + FD + (1 | sub ), data=datay)

cat("\nModel summary:\n")
## 
## Model summary:
summary(pnc.model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: QEI ~ FD * CBFTYPE + AGE + sex + FD + (1 | sub)
##    Data: datay
## 
## REML criterion at convergence: -26642.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.2828 -0.4013  0.0303  0.4659  4.6672 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  sub      (Intercept) 0.003388 0.05821 
##  Residual             0.005255 0.07249 
## Number of obs: 12804, groups:  sub, 3311
## 
## Fixed effects:
##                   Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)      8.817e-01  3.362e-03  5.969e+03 262.297  < 2e-16 ***
## FD              -4.360e-02  1.104e-02  8.832e+03  -3.950 7.89e-05 ***
## CBFTYPEPVC      -1.382e-02  3.025e-03  9.552e+03  -4.568 4.99e-06 ***
## CBFTYPESCBF     -1.469e-01  2.905e-03  9.431e+03 -50.587  < 2e-16 ***
## CBFTYPESCRUB    -4.127e-02  2.902e-03  9.442e+03 -14.222  < 2e-16 ***
## AGE             -2.823e-03  6.237e-05  3.348e+03 -45.258  < 2e-16 ***
## sexM            -3.438e-02  2.444e-03  3.230e+03 -14.065  < 2e-16 ***
## FD:CBFTYPEPVC    1.050e-02  1.292e-02  9.614e+03   0.813    0.416    
## FD:CBFTYPESCBF  -3.737e-01  1.220e-02  9.437e+03 -30.620  < 2e-16 ***
## FD:CBFTYPESCRUB -1.782e-01  1.218e-02  9.460e+03 -14.640  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##               (Intr) FD     CBFTYPEP CBFTYPESCB CBFTYPESCR AGE    sexM  
## FD            -0.610                                                    
## CBFTYPEPVC    -0.422  0.417                                             
## CBFTYPESCBF   -0.432  0.435  0.479                                      
## CBFTYPESCRU   -0.432  0.437  0.480    0.500                             
## AGE           -0.538 -0.012  0.018    0.000      0.000                  
## sexM          -0.391 -0.005 -0.003    0.000     -0.001      0.153       
## FD:CBFTYPEP    0.320 -0.520 -0.789   -0.372     -0.372      0.002  0.001
## FD:CBFTYPESCB  0.340 -0.551 -0.378   -0.789     -0.394     -0.001  0.001
## FD:CBFTYPESCR  0.341 -0.554 -0.379   -0.395     -0.789      0.001  0.001
##               FD:CBFTYPEP FD:CBFTYPESCB
## FD                                     
## CBFTYPEPVC                             
## CBFTYPESCBF                            
## CBFTYPESCRU                            
## AGE                                    
## sexM                                   
## FD:CBFTYPEP                            
## FD:CBFTYPESCB  0.471                   
## FD:CBFTYPESCR  0.472       0.500
cat("\n\nGet the overall interaction signficance using anova: \n")
## 
## 
## Get the overall interaction signficance using anova:
anova(pnc.model)
## Type III Analysis of Variance Table with Satterthwaite's method
##             Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## FD          2.5023  2.5023     1 3290.6  476.14 < 2.2e-16 ***
## CBFTYPE    16.2833  5.4278     3 9494.6 1032.82 < 2.2e-16 ***
## AGE        10.7643 10.7643     1 3347.7 2048.26 < 2.2e-16 ***
## sex         1.0396  1.0396     1 3230.4  197.81 < 2.2e-16 ***
## FD:CBFTYPE  6.6147  2.2049     3 9532.1  419.56 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjPlot::plot_model(pnc.model, colors = "Set1",type = "int",show.data = TRUE) + theme(axis.title.x = element_text(size = rel(1.6))) +
            theme(axis.title.y = element_text(size = rel(1.6))) +
            theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = 1.5), axis.ticks.length = unit(.25, "cm")) +
           theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())  + scale_x_continuous(expand = c(0, 0)) +
           scale_y_continuous(expand = c(0, 0))
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

Extended data figure 6

Extended Data Fig. 6 | CBF of gray and white matter across datasets. The distribution of cerebral blood flow (CBF) within grey matter (GM) and white matter (WM) is displayed for each dataset, for each quantification option: the standard CBF model, BASIL (a), BASIL with partial volume correction (PVC; b), and SCRUB (c). SCRUB could not be applied for the FTD dataset as an ASL timeseries is required; the sequence used for that study provided only a single ∆M image. Boxes within each violin plot indicate interquartile range with the median shown as a white dot.

# CBF GM
datagm = all_data
datagm$CBF.WM =NULL
datagm$TP ='GM'
datagm$CBF=datagm$CBF.GM 
datagm$CBF.GM = NULL 

#CBF WM 
datawm = all_data
datawm$CBF.GM =NULL
datawm$TP ='WM'
datawm$CBF=datawm$CBF.WM  
datawm$CBF.WM = NULL 

datasets = rbind(datagm,datawm)
dir.create("ExtendedDataFigure6")
## Warning in dir.create("ExtendedDataFigure6"): 'ExtendedDataFigure6' already
## exists
write.csv(datasets, "ExtendedDataFigure6/ExtendedDataFigure6.csv")

# BASIL
datasetsy = datasets[datasets$CBFTYPE =='BASIL',]
datasetsy = na.omit(datasetsy)
dodge <- position_dodge(width = 0.5)
basil <- ggplot(datasetsy, aes(x=Datasets,y=CBF,fill=TP),plot = FALSE) + 
  geom_violin(position = dodge,width = 1.4)+
  geom_boxplot(width=.1, outlier.colour=NA, position = dodge) 
basil=basil + theme_classic() + scale_x_discrete(limits = rev) + 
  labs(x = "Datasets", y = "CBF(mL/100g/min)") +
  theme(axis.title.x = element_text(size = rel(1.6))) +
  theme(axis.title.y = element_text(size = rel(1.6),vjust=-0.2)) + ylim(0,140) +
  theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = .5)) +
  scale_fill_manual(values=c("#d95f0e","#756bb1")) + theme(legend.position = "none")

# BASIL + PVC
datasetsy = datasets[datasets$CBFTYPE =='PVC',]
datasetsy = na.omit(datasetsy)

dodge <- position_dodge(width = 0.5)
pvc<- ggplot(datasetsy, aes(x=Datasets,y=CBF,fill=TP),plot = FALSE) + 
  geom_violin(position = dodge,width = 1.4)+
  geom_boxplot(width=.1, outlier.colour=NA, position = dodge) 
pvc=pvc + theme_classic() + scale_x_discrete(limits = rev) + 
  labs(x = "Datasets", y = "CBF(mL/100 g/min)") +
  theme(axis.title.x = element_text(size = rel(1.6))) +
  theme(axis.title.y = element_text(size = rel(1.6))) + ylim(0,140) +scale_fill_manual(values=c("#d95f0e","#756bb1"))+
  theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = .5)) +
  theme(axis.title.y = element_blank(),axis.ticks.y = element_blank(),axis.text.y = element_blank()) +
theme(legend.position = "none")

# SCRUB
datasetsy = datasets[datasets$CBFTYPE =='SCRUB',]
datasetsy = na.omit(datasetsy)

dodge <- position_dodge(width = 0.5)
scrub <- ggplot(datasetsy, aes(x=Datasets,y=CBF,fill=TP),plot = FALSE) + 
  geom_violin(position = dodge,width = 1.4)+
  geom_boxplot(width=.1, outlier.colour=NA, position = dodge) 
scrub=scrub + theme_classic() + scale_x_discrete(limits = rev) + 
  labs(x = "Datasets", y = "CBF(mL/100 g/min)") +
  theme(axis.title.x = element_text(size = rel(1.6))) + scale_fill_manual(values=c("#d95f0e","#756bb1"))+
  theme(axis.title.y = element_text(size = rel(1.6))) + ylim(0,140) +
  theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = .5)) +
theme(axis.title.y = element_blank(),axis.ticks.y = element_blank(),axis.text.y = element_blank()) +
theme(legend.position = "none")

# combine the 3
figure <- ggarrange(basil,pvc,scrub,
                    ncol = 3, nrow = 1,widths=c(2.5,2,1.5) )
## Warning: position_dodge requires non-overlapping x intervals
## Warning: position_dodge requires non-overlapping x intervals
## position_dodge requires non-overlapping x intervals
ggsave("ExtendedDataFigure6/ExtendedDataFigure6.pdf", width=10, units="in", height=3,dpi=800)
figure

Extended data figure 7

Extended Data Fig. 7 | CBF declines nonlinearly with age over the lifespan. Evolution of gray matter CBF with age over the lifespan across all five datasets. For each dataset, we used four methods for quantifying CBF: the standard CBF model (see main text), BASIL (a), BASIL with partial volume correction (PVC; b), and SCRUB (c). We used a generalized additive model with penalized splines to characterize the nonlinear evolution of CBF over age. The thick black line represents the predicted values, while the dashed lines represent the 95% confidence intervals.

kk = 4 # order
dir.create("ExtendedDataFigure7")
## Warning in dir.create("ExtendedDataFigure7"): 'ExtendedDataFigure7' already
## exists
write.csv(all_data, "ExtendedDataFigure7/ExtendedDataFigure7.csv")

# BASIL PLOT
datay = all_data[all_data$CBFTYPE =='BASIL',]
pnc = datay[datay$Datasets=='PNC',]
nki = datay[datay$Datasets=='NKI',]
age = datay[datay$Datasets=='AGE',]
irr = datay[datay$Datasets=='IRR',]
ftd = datay[datay$Datasets=='FTD',]

#############################################################
basil_Age_gam <- gam(CBF.GM ~ AGE + sex + FD + s(AGE, k=kk), method="REML", data = datay)

#####################
## Look at results ##
#####################
#summary(cbf_Age_gam)

## Nonlinear age effect
Age_pval <- summary(basil_Age_gam)$s.table[1,4]
Age_pval
## [1] 0
####################################
## Visualize Nonlinear Age Effect ##
####################################
plotdata <- visreg(basil_Age_gam,'AGE',type = "conditional",scale = "linear", plot = FALSE)
smooths <- data.frame(Variable = plotdata$meta$x, 
                      x=plotdata$fit[[plotdata$meta$x]], 
                      smooth=plotdata$fit$visregFit, 
                      lower=plotdata$fit$visregLwr, 
                      upper=plotdata$fit$visregUpr)
predicts <- data.frame(Variable = "dim1", 
                       x=plotdata$res$AGE,
                       y=plotdata$res$visregRes)

basil_Age_plot <- ggplot() + xlim(8, 85)+ ylim(0,140) +
  #theme(legend.position = "none")  +
  labs(x = "Age (years)", y = "CBF(mL/100g/min)") +
  theme(axis.title.x = element_text(size = rel(1.6))) +
  theme(axis.title.y = element_text(size = rel(1.6),vjust=-1.2)) + 
  theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = .5), axis.ticks.length = unit(.25, "cm")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
  geom_point(data = predicts, aes(x, y), colour = "darksalmon", alpha=0.7 ) +  
  #geom_point(data=datay,aes(x=AGE, y=CBF.GM),size=1)+ 
  geom_point(data=pnc,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill='#e34a33', color="white")+ 
  geom_point(data=nki,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#3182bd", color="white")+ 
  geom_point(data=irr,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#31a354", color="white")+ 
  geom_point(data=ftd,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#d95f02", color="white")+ 
  geom_point(data=age,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#c51b8a", color="white")+ 
  geom_line(data = smooths, aes(x = x, y = smooth), colour = "midnightblue",size=1) +
  geom_line(data = smooths, aes(x = x, y=lower), linetype="dashed", colour = "midnightblue", alpha = 0.9, size = 0.9) + 
  geom_line(data = smooths, aes(x = x, y=upper), linetype="dashed",colour = "midnightblue", alpha = 0.9, size = 0.9) +
  coord_cartesian(xlim = c(10.5,85), ylim = c(0,140))

# BASIL
datay = all_data[all_data$CBFTYPE =='PVC',]
pnc = datay[datay$Datasets=='PNC',]
nki = datay[datay$Datasets=='NKI',]
age = datay[datay$Datasets=='AGE',]
irr = datay[datay$Datasets=='IRR',]
ftd = datay[datay$Datasets=='FTD',]

#############################################################
pvc_Age_gam <- gam(CBF.GM ~ AGE + sex + FD + s(AGE, k=kk), method="REML", data = datay)

#####################
## Look at results ##
#####################
#summary(cbf_Age_gam)

## Nonlinear age effect
Age_pval <- summary(pvc_Age_gam)$s.table[1,4]
Age_pval
## [1] 0
####################################
## Visualize Nonlinear Age Effect ##
####################################
plotdata <- visreg(pvc_Age_gam,'AGE',type = "conditional",scale = "linear", plot = FALSE)
smooths <- data.frame(Variable = plotdata$meta$x, 
                      x=plotdata$fit[[plotdata$meta$x]], 
                      smooth=plotdata$fit$visregFit, 
                      lower=plotdata$fit$visregLwr, 
                      upper=plotdata$fit$visregUpr)
predicts <- data.frame(Variable = "dim1", 
                       x=plotdata$res$AGE,
                       y=plotdata$res$visregRes)

pvc_Age_plot <- ggplot() + xlim(8, 85)+ ylim(0,140) +
  #theme(legend.position = "none")  +
  labs(x = "Age (years)", y = "CBF(mL/100 g/min)") +
  theme(axis.title.x = element_text(size = rel(1.6))) +
  theme(axis.title.y = element_text(size = rel(1.6))) + 
  theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = .5), axis.ticks.length = unit(.25, "cm")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
  geom_point(data = predicts, aes(x, y), colour = "darksalmon", alpha=0.7 ) +  
  #geom_point(data=datay,aes(x=AGE, y=CBF.GM),size=1)+ 
  geom_point(data=pnc,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill='#e34a33', color="white")+ 
  geom_point(data=nki,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#3182bd", color="white")+ 
  geom_point(data=irr,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#31a354", color="white")+ 
  geom_point(data=ftd,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#d95f02", color="white")+ 
  geom_point(data=age,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#c51b8a", color="white")+ 
  geom_line(data = smooths, aes(x = x, y = smooth), colour = "midnightblue",size=1) +
  geom_line(data = smooths, aes(x = x, y=lower), linetype="dashed", colour = "midnightblue", alpha = 0.9, size = 0.9) + 
  geom_line(data = smooths, aes(x = x, y=upper), linetype="dashed",colour = "midnightblue", alpha = 0.9, size = 0.9) +
  coord_cartesian(xlim = c(10.5,85), ylim = c(0,140)) +
   theme(axis.title.y = element_blank(),axis.ticks.y = element_blank(),axis.text.y = element_blank())


# SCRUB
datay = all_data[all_data$CBFTYPE =='SCRUB',]
pnc = datay[datay$Datasets=='PNC',]
nki = datay[datay$Datasets=='NKI',]
age = datay[datay$Datasets=='AGE',]
irr = datay[datay$Datasets=='IRR',]
ftd = datay[datay$Datasets=='FTD',]

#############################################################
scrub_Age_gam <- gam(CBF.GM ~ AGE + sex + FD + s(AGE, k=kk), method="REML", data = datay)

#####################
## Look at results ##
#####################
#summary(cbf_Age_gam)

## Nonlinear age effect
Age_pval <- summary(scrub_Age_gam)$s.table[1,4]
Age_pval
## [1] 0
####################################
## Visualize Nonlinear Age Effect ##
####################################
plotdata <- visreg(scrub_Age_gam,'AGE',type = "conditional",scale = "linear", plot = FALSE)
smooths <- data.frame(Variable = plotdata$meta$x, 
                      x=plotdata$fit[[plotdata$meta$x]], 
                      smooth=plotdata$fit$visregFit, 
                      lower=plotdata$fit$visregLwr, 
                      upper=plotdata$fit$visregUpr)
predicts <- data.frame(Variable = "dim1", 
                       x=plotdata$res$AGE,
                       y=plotdata$res$visregRes)

scrub_Age_plot <- ggplot() + xlim(8, 85)+ ylim(0,140) +
  #theme(legend.position = "none")  +
  labs(x = "Age (years)", y = "CBF(mL/100 g/min)") +
  theme(axis.title.x = element_text(size = rel(1.6))) +
  theme(axis.title.y = element_text(size = rel(1.6))) + 
  theme(axis.text = element_text(size = rel(1.4))) + theme(axis.line = element_line(colour = 'black', size = .5), axis.ticks.length = unit(.25, "cm")) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
  geom_point(data = predicts, aes(x, y), colour = "darksalmon", alpha=0.7 ) +  
  #geom_point(data=datay,aes(x=AGE, y=CBF.GM),size=1)+ 
  geom_point(data=pnc,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill='#e34a33', color="white")+ 
  geom_point(data=nki,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#3182bd", color="white")+ 
  geom_point(data=irr,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#31a354", color="white")+ 
  #geom_point(data=ftd,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#d95f02", color="white")+ 
  geom_point(data=age,aes(x=AGE, y=CBF.GM),shape=21,size=2,fill="#c51b8a", color="white")+ 
  geom_line(data = smooths, aes(x = x, y = smooth), colour = "midnightblue",size=1) +
  geom_line(data = smooths, aes(x = x, y=lower), linetype="dashed", colour = "midnightblue", alpha = 0.9, size = 0.9) + 
  geom_line(data = smooths, aes(x = x, y=upper), linetype="dashed",colour = "midnightblue", alpha = 0.9, size = 0.9) +
  coord_cartesian(xlim = c(10.5,85), ylim = c(0,140)) +
theme(axis.title.y = element_blank(),axis.ticks.y = element_blank(),axis.text.y = element_blank())

figure <- ggarrange(basil_Age_plot,pvc_Age_plot,scrub_Age_plot,
                    ncol = 3, nrow = 1,widths=c(2.3,2,2) )
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 8 rows containing missing values (geom_point).
## Warning: Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
## Warning: Removed 6 rows containing missing values (geom_point).
## Removed 6 rows containing missing values (geom_point).
## Warning: Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
## Warning: Removed 9 rows containing missing values (geom_point).
## Removed 9 rows containing missing values (geom_point).
## Warning: Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
## Removed 3 row(s) containing missing values (geom_path).
ggsave("ExtendedDataFigure7/ExtendedDataFigure7.pdf", width=10, units="in", height=4,dpi=800)
figure